# If dropping all inds where both sea and sm age is NAsallaa %>%ggplot() +geom_density(aes(x = tot.age), fill ="deeppink", alpha =0.5) +xlim(0, 13) +sallaa %>%ggplot() +geom_density(aes(x = length_mm), fill ="deeppink", alpha =0.5)
biph4.code <-nimbleCode({# likelihoodfor(i in1:nobs){ length_mm[i] ~dnorm(l.mu[age.index[i], ind.id[i]], sd = sig.l) }# Estimate length at age for each individual (j in nn) and for each age of j (k in age.index=1 to age.index[j], i.e. age 0 to catch age)for(j in1:nn){ l.mu[1,j] <- lb.mu[spat.unit[j]]# old:+ g[spat.unit[j], hatch.year.f[j]] # agefor(k in2:age.index[j]+1){#Say that we for hatch.year=2010 want to predict length of an individual of age 3. If we then want to use environmental conditions in 2012 to predict length in 2013, year should be hatch.year + 2 = 2012 but here k = age + 1 = 4. Using k, we need hatch.year + k - 2. BUT, this makes for using hatch year for both k=1 (in l.mu[1,j]) and k=2. Furtherm. k-1 has different meaning in the step for smolt age. l.mu[k,j] <- l.mu[k-1,j] +step(smo.age[j] - (k-1))*exp(g[spat.unit[j], (hatch.year.f[j]+k-2)]) + (1-step(smo.age[j] - (k-1)))*((exp(par[spat.unit[j],(hatch.year.f[j]+k-2),1]) - l.mu[k-1,j])*(1-exp(-exp(par[spat.unit[j],(hatch.year.f[j]+k-2),2]) #*(1 + (tot.age[j] %% 1)) ))) } }# estimate smo.age. for(l in1:nn){ smo.age[l] ~dgamma(shape = sh.sm[spat.unit[l]], scale = sc.sm) }# Priors sig.l ~dexp(1/500)#lb.mu ~ dnorm(17, sd = 5) # ~15 to 20 mm, https://doi.org/10.1111/j.1095-8649.2009.02497.x sd arbitary/guesstimate# LKJ prior on correlation matrix, see NIMBLE manual p45. Ustar[1:npars,1:npars] ~dlkj_corr_cholesky(1.3, npars) # eta = 1.3 U[1:npars,1:npars] <-uppertri_mult_diag(Ustar[1:npars, 1:npars], sig_par[1:npars])# priors for smolt age and juvenile growth rate sc.sm <-0.5^2/2.5# shape = 2.5^2/.5^2 = 25, #From mean=shape*scale, var=shape*scale^2 -> shape = mean^2/var & scale = var/mean. mean=2.5, sd=0.5.for(i in1:nsu){ sh.sm[i] ~dnorm(25, sd =5) # dgamma shape param, sd is arbitrary lb.mu[i] ~dnorm(100, sd =25) # FIX priorfor(j in minyear:maxyear){ # Take into accuount hatch.year[j]+k-1 par[i,j,1:npars] ~dmnorm(mu_par[1:npars], cholesky = U[1:npars, 1:npars], prec_param =0) g[i,j] ~dnorm(mean = g_lmean, sd = g_lsd) # LOG SCALE and weak prior } } g_lsd <-sqrt(log(1+ (10^2) / (50^2))) # sqrt of variance (= sd) of the lognormal distr. 50 and 10 is arbitrary atm. g_lmean <-log(50) -0.5* g_lsd^2# mean of the lognorm distr. mu_par[1] ~dnorm(mean = l_lmean, sd = l_lsd) mu_par[2] ~dnorm(mean = k_lmean, sd = k_lsd) sig_par[1] ~dlnorm(0,1) sig_par[2] ~dlnorm(0,1)# K & l values from fb k_lsd <-sqrt(log(1+ (0.28^2) / (0.43^2))) k_lmean <-log(0.43) -0.5* k_lsd^2 l_lsd <-sqrt(log(1+ (278^2) / (1378^2))) l_lmean <-log(1378) -0.5* l_lsd^2 })# Function creating the Cholesky of the covar. matrix (p45 Nimble manual)uppertri_mult_diag <-nimbleFunction(run =function(mat =double(2), vec =double(1)) {returnType(double(2)) p <-length(vec) out <-matrix(nrow = p, ncol = p, init =FALSE)for(k in1:p) out[ , k] <- mat[ , k] * vec[k]return(out)# turn off buildDerivs for the i index}, buildDerivs =list(run =list(ignore =c('k'))))data <- sallaa %>%filter(!age.type =="sea.only",!is.na(year), year >2000 ) %>%slice_sample(n =2000) %>%mutate(# create smolt age (!IMPROVE from data using fi_lfs_code)smo.age =if_else(age.type =="both", juv.age, NA),sea.age =if_else(is.na(sea.age), 0, sea.age),age.index =as.integer(tot.age +1), # index for l.mu, + 1 to include age 0 individualshatch.year = year-tot.age,hatch.year.f =as.numeric(factor(hatch.year, levels =sort(unique(hatch.year)), labels =seq_along(sort(unique(hatch.year))))),year.f =as.integer(factor(year)),spat.unit =as.integer(factor(spat.unit))) %>%select(smo.age, tot.age, length_mm, age.index, year, spat.unit, hatch.year, hatch.year.f) %>%mutate(ind.id =row_number()) # ind id
filter: removed 58,915 rows (27%), 157,303 rows remaining
slice_sample: removed 155,303 rows (99%), 2,000 rows remaining
mutate: changed 977 values (49%) of 'sea.age' (977 fewer NAs)
converted 'spat.unit' from character to integer (0 new NA)
new variable 'smo.age' (double) with 6 unique values and 49% NA
new variable 'age.index' (integer) with 10 unique values and 0% NA
new variable 'hatch.year' (double) with 31 unique values and 0% NA
new variable 'hatch.year.f' (double) with 31 unique values and 0% NA
new variable 'year.f' (integer) with 25 unique values and 0% NA
select: dropped 20 variables (sai_cou_code, site, origin, weight_g, sea.age, …)
mutate: new variable 'ind.id' (integer) with 2,000 unique values and 0% NA
Show code
npars <-2# get max year by, needed as hatch.year is integer and the last year is not (simpler solution?). maxyear <-max(data$hatch.year.f) + data %>%filter(year ==max(year)) %>%filter(tot.age ==max(tot.age)) %>%distinct(tot.age) %>%pull(tot.age)
filter: removed 1,997 rows (>99%), 3 rows remaining
filter: removed one row (33%), 2 rows remaining
distinct: removed one row (50%), one row remaining
select: dropped 7 variables (tot.age, age.index, year, spat.unit, hatch.year, …)
Defining model
Building model
Setting data and initial values
Running calculate on model
[Note] Any error reports that follow may simply reflect missing values in model variables.
Checking model sizes and dimensions
[Note] This model is not fully initialized. This is not an error.
To see which variables are not initialized, use model$initializeInfo().
For more information on model initialization, see help(modelInitialization).
# NA l.mussum(is.na(biph4.model$l.mu))/length(biph4.model$l.mu) # percent negatives
[1] 0.6297273
Show code
# NAs are afterbiph4.model$l.mu[,1:10] # first 10 individuals
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 104.7519 144.7610 98.82879 98.82879 98.82879 114.8625 144.7610
[2,] 153.5169 191.9771 137.15697 137.84001 149.23281 154.4266 196.6969
[3,] 275.8306 NA 822.34277 1450.28433 NA 199.3655 996.7027
[4,] 3861.6090 NA 1642.57191 1551.25106 NA 428.6716 5852.4332
[5,] NA NA 1678.55802 1667.43926 NA 441.5382 NA
[6,] NA NA 1725.55493 1514.24446 NA NA NA
[7,] NA NA NA 1396.26817 NA NA NA
[8,] NA NA NA NA NA NA NA
[9,] NA NA NA NA NA NA NA
[10,] NA NA NA NA NA NA NA
[11,] NA NA NA NA NA NA NA
[,8] [,9] [,10]
[1,] 144.7610 101.5233 98.82879
[2,] 196.6969 165.4286 140.78293
[3,] NA 206.4830 187.46331
[4,] NA 266.3169 1541.24737
[5,] NA 784.9319 2292.35538
[6,] NA 1431.6856 NA
[7,] NA NA NA
[8,] NA NA NA
[9,] NA NA NA
[10,] NA NA NA
[11,] NA NA NA
samp <- biph4.sampsum <-summary(samp)# summary of values of sampled parametersvars_subset <-grep("^g\\[", rownames(sum$statistics), value =TRUE)summary(exp(sum$statistics[which(rownames(sum$statistics)%in%vars_subset),][,1]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
41.63 48.05 48.97 48.46 49.14 53.93
Show code
vars_subset <-grep("^par\\[", rownames(sum$statistics), value =TRUE)vars_subset1 <-grep("1]", vars_subset, value =TRUE)summary(exp(sum$statistics[which(rownames(sum$statistics)%in%vars_subset1),][,1]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
722.9 974.1 980.9 984.3 984.7 1429.1
Show code
vars_subset2 <-grep("2]", vars_subset, value =TRUE)summary(exp(sum$statistics[which(rownames(sum$statistics)%in%vars_subset2),][,1]))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.3455 0.8524 0.8611 0.8812 0.8771 1.6282
Show code
vars_subset <-grep("^lb.mu\\[", rownames(sum$statistics), value =TRUE)summary(sum$statistics[which(rownames(sum$statistics)%in%vars_subset),][,1])
Min. 1st Qu. Median Mean 3rd Qu. Max.
19.57 63.67 72.19 72.16 84.12 121.13
Show code
vars_subset <-grep("sig.l", rownames(sum$statistics), value =TRUE)sum$statistics[which(rownames(sum$statistics)%in%vars_subset),]
Mean SD Naive SE Time-series SE
38.651434433 0.662647114 0.006626471 0.018106568
Show code
# some trace plotsrn <-sample(1:200, 20)vars_subset <-grep("^g\\[", varnames(samp), value =TRUE)[rn]traceplot(samp[, vars_subset, drop =FALSE])
Show code
vars_subset <-grep("^par\\[", varnames(samp), value =TRUE)[rn]traceplot(samp[, vars_subset, drop =FALSE])
Show code
vars_subset <-grep("^lb.mu\\[", varnames(samp), value =TRUE)traceplot(samp[, vars_subset, drop =FALSE])
Show code
vars_subset <-grep("sig.l", varnames(samp), value =TRUE)traceplot(samp[, vars_subset, drop =FALSE])
# growth curves not looking ok. Should plot the individual curves and summarise the mean by su curvesamp %>%spread_draws(par[su,hy,p], g[su,hy], lb.mu[su], sep =",") %>%mutate(g =exp(g),par =exp(par)) %>%group_by(.draw,su,p) %>%summarise(par.m =mean(par), g.m =mean(g), lb.mu =mean(lb.mu), .groups ="keep") %>%group_by(su,p) %>%median_qi() %>%mutate(p =case_when(p ==1~"Linf", p ==2~"K",.default =NA)) %>%pivot_wider(id_cols = su, names_from = p, values_from =c(par.m, g.m, lb.mu)) %>%expand_grid(age =seq(0, 12, 0.1)) %>%mutate(length.a = par.m_Linf*(1-exp(-par.m_K*age)),length.j = lb.mu_Linf + g.m_Linf) %>%ggplot() +geom_point(data = sallaa %>%filter(!age.type =="sea.only"), aes(tot.age, length_mm), alpha =0.1, color ="lightblue") +geom_line(aes(age, length.a, color =factor(su))) +geom_line(aes(age, length.j, color =factor(su))) +theme_light()
mutate (grouped): changed 7,920,000 values (100%) of 'par' (0 new NAs)
changed 7,920,000 values (100%) of 'g' (0 new NAs)
group_by: 3 grouping variables (.draw, su, p)
summarise: now 240,000 rows and 6 columns, 3 group variables remaining (.draw, su, p)
group_by: 2 grouping variables (su, p)
mutate: converted 'p' from integer to character (0 new NA)
pivot_wider: reorganized (p, par.m, par.m.lower, par.m.upper, g.m, …) into (par.m_Linf, par.m_K, g.m_Linf, g.m_K, lb.mu_Linf, …) [was 24x14, now 12x7]
mutate: new variable 'length.a' (double) with 1,441 unique values and 0% NA
new variable 'length.j' (double) with 12 unique values and 0% NA
filter: removed 15,600 rows (7%), 200,618 rows remaining
Check l.mu
Show code
#samp.lmu <- biph4.samp.lmu# remove NA nodes (the older-than-catch-age nodes)cm <-lapply(biph4.samp.lmu, as.matrix)nonNA_nodes <- cm %>%map(~colnames(.x)[colSums(is.na(.x)) ==0]) %>%reduce(intersect) # applies function intersect over elements of the lists returned by map(), i.e. removing the colnames from both lists.cm_cleaned <-lapply(cm, function(x) x[, nonNA_nodes, drop =FALSE])samp.cl <-mcmc.list(lapply(cm_cleaned, mcmc)) #sum <- summary(samp.cl) # not working when l.mu == NA# some traces of l.muvars_subset <-grep("^l.mu\\[", varnames(samp.cl), value =TRUE)[rn]traceplot(samp.cl[, vars_subset, drop =FALSE])
Plot predicted vs observed l.mu
Show code
# plot observed against predictedobs <- data %>%select(length_mm,tot.age,ind.id) %>%mutate(tot.age =round(tot.age),observed = length_mm)
select: dropped 6 variables (smo.age, age.index, year, spat.unit, hatch.year, …)
mutate: new variable 'observed' (double) with 392 unique values and 0% NA
rename: renamed one variable (predicted)
mutate: new variable 'tot.age' (double) with 11 unique values and 0% NA
filter: removed 2 rows (<1%), 8,144 rows remaining